library(readr)
house_data <- read_csv("Dataset/Rent_House.csv")
## Rows: 10677 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): floor, animal, furniture
## dbl (8): area, rooms, bathroom, parking spaces, hoa, rent amount, property t...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(house_data)
## spc_tbl_ [10,677 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ area : num [1:10677] 700 350 486 318 80 900 540 425 200 350 ...
## $ rooms : num [1:10677] 4 3 8 4 2 3 6 4 4 3 ...
## $ bathroom : num [1:10677] 7 3 4 3 1 4 8 4 3 3 ...
## $ parking spaces: num [1:10677] 8 3 6 0 1 8 3 0 0 3 ...
## $ floor : chr [1:10677] "-" "-" "-" "-" ...
## $ animal : chr [1:10677] "acept" "acept" "acept" "acept" ...
## $ furniture : chr [1:10677] "not furnished" "not furnished" "not furnished" "not furnished" ...
## $ hoa : num [1:10677] 0 0 0 0 875 0 0 0 0 0 ...
## $ rent amount : num [1:10677] 45000 30000 25000 19000 24000 20000 15000 15000 15000 15000 ...
## $ property tax : num [1:10677] 8750 560 2200 384 0 ...
## $ fire insurance: num [1:10677] 677 451 376 338 305 301 267 267 267 267 ...
## - attr(*, "spec")=
## .. cols(
## .. area = col_double(),
## .. rooms = col_double(),
## .. bathroom = col_double(),
## .. `parking spaces` = col_double(),
## .. floor = col_character(),
## .. animal = col_character(),
## .. furniture = col_character(),
## .. hoa = col_double(),
## .. `rent amount` = col_double(),
## .. `property tax` = col_double(),
## .. `fire insurance` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
normalized_house_data <- as.data.frame(lapply(house_data[, c(8:11)], function(x) {
(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}))
boxplot(normalized_house_data)

set.seed(100)
random_indices <- sample(nrow(normalized_house_data), 100)
house_data_normalized <- cbind(normalized_house_data, animal = house_data[, 6])
stars(house_data_normalized[random_indices, ], label = normalized_house_data$animals)

# Dive Deeper into Visualization Techniques
attach(house_data)
#Plots
houses_prices <- data.frame(as.numeric(rownames(house_data)),house_data[,c(1:4, 8:11)])
labs.diagonal <- c("Area","Rooms","Number of Bathrooms","Parking Space","floors", "HOA", "Rent Price", "Property Tax", "fire insurance")
plot(`area`, `rent amount`, xlab="Area",ylab="Rent Price",pch=c(16,1))

pairs(house_data[,c(1:4, 8:11)])

library(GGally)
## 载入需要的程辑包:ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggscatmat(house_data, columns=c(1:4, 8:11), color = "animal")

library(GGally)
ggscatmat(house_data, columns=c(1:4, 8:11), color = "furniture")

# load packages
library(lattice)
library(ggplot2)
library(ggridges)
library(ggvis)
##
## 载入程辑包:'ggvis'
## The following object is masked from 'package:ggplot2':
##
## resolution
library(ggthemes)
library(cowplot)
##
## 载入程辑包:'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
library(gapminder)
## Warning: 程辑包'gapminder'是用R版本4.1.2 来建造的
library(gganimate)
## Warning: 程辑包'gganimate'是用R版本4.1.2 来建造的
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
##
## 载入程辑包:'gganimate'
## The following object is masked from 'package:ggvis':
##
## view_static
library(dplyr)
##
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: 程辑包'tidyverse'是用R版本4.1.2 来建造的
## Warning: 程辑包'tibble'是用R版本4.1.2 来建造的
## Warning: 程辑包'forcats'是用R版本4.1.2 来建造的
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggvis::resolution() masks ggplot2::resolution()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(grid)
library(gridExtra)
##
## 载入程辑包:'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(RColorBrewer)
## Warning: 程辑包'RColorBrewer'是用R版本4.1.2 来建造的
attach(house_data)
## The following objects are masked from house_data (pos = 23):
##
## animal, area, bathroom, fire insurance, floor, furniture, hoa,
## parking spaces, property tax, rent amount, rooms
# ggplot
#ggplot(house_data, aes(x=`area`,y=`rent amount`)) + geom_point()
ggplot(house_data, aes(x=`area`,y=`rent amount`)) + geom_point(colour="steelblue", pch=3) +
labs(x="squares of area", y="Rent prices in Brazilian Reals", title="House Rent Prices")

ggplot(house_data, aes(x=`property tax`,y=`rent amount`)) + geom_point(colour="red", pch=3) +
labs(x="Proper Tax in Brazilian Reals", y="Rent prices in Brazilian Reals", title="House Rent Prices")

# bar chart
ggplot(house_data, aes(rooms)) + geom_bar(position="stack")

ggplot(house_data, aes(bathroom) )+ geom_bar(position="stack")

ggplot(house_data, aes(rooms)) + facet_grid(.~bathroom) + geom_bar(position="dodge")

# histogram
ggplot(house_data, aes(`parking spaces`))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(house_data, aes(`parking spaces`))+geom_histogram(aes(fill = after_stat(count)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_point() + geom_smooth(method=lm)
## `geom_smooth()` using formula = 'y ~ x'

ggplot(house_data, aes(x=`rooms`, y=`rent amount`)) + geom_point() + stat_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_point() + stat_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(house_data, aes(x=`rooms`, y=`rent amount`)) + geom_point() + geom_smooth(method=lm)
## `geom_smooth()` using formula = 'y ~ x'

# violin plot
ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_violin()

ggplot(house_data, aes(x=`rooms`, y=`rent amount`)) + geom_violin()

ggplot(house_data, aes(x=`bathroom`, y=`rent amount`)) + geom_violin()

# box plot
ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_boxplot() + coord_flip()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

ggplot(house_data, aes(x=`rooms`, y=`rent amount`)) + geom_boxplot() + coord_flip()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

ggplot(house_data, aes(x=`bathroom`, y=`rent amount`)) + geom_boxplot() + coord_flip()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

ggplot(house_data, aes(x=`rooms`, y=`rent amount`)) + geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

ggplot(house_data, aes(x=`bathroom`, y=`rent amount`)) + geom_boxplot()
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?

# density plot and ggridges
ggplot(house_data, aes(x=`area`)) + geom_density()

ggplot(house_data, aes(x=`area`, fill=`rooms`, color=`rooms`)) + geom_density()
## Warning: The following aesthetics were dropped during statistical transformation: fill,
## colour
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

ggplot(house_data, aes(x=`area`, fill=`rooms`, color=`rooms`)) + geom_density(alpha=0.3, aes(y=..scaled..))
## Warning: The dot-dot notation (`..scaled..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(scaled)` instead.
## The following aesthetics were dropped during statistical transformation: fill,
## colour ℹ This can happen when ggplot fails to infer the correct grouping
## structure in the data. ℹ Did you forget to specify a `group` aesthetic or to
## convert a numerical variable into a factor?
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(house_data, aes(x=`area`, y=`furniture`)) + geom_density_ridges(aes(fill = furniture))
## Picking joint bandwidth of 16.4

ggplot(house_data, aes(x=`rent amount`)) + geom_density()

ggplot(house_data, aes(x=`rent amount`, y=`furniture`)) + geom_density_ridges(aes(fill = furniture))
## Picking joint bandwidth of 472

# hexbin
ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_hex()

# with ggthemes (see also ggsci, ggthemr)
lastplot <- ggplot(house_data, aes(x=`area`, y=`rent amount`)) + geom_point(aes(color=furniture)) + stat_smooth() +
labs(x="weight of diamond in carats", y="price of diamond in dollars", title="Diamond Data")
lastplot + theme_bw()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

lastplot + theme_cowplot()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

lastplot + theme_dark()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

lastplot + theme_economist()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

lastplot + theme_fivethirtyeight()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

lastplot + theme_tufte()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

lastplot + theme_wsj()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
